/*
 * Author: tdanford
 * Date: Jan 7, 2009
 */
package edu.mit.csail.cgs.sigma.expression.workflow;

import java.util.*;
import java.util.regex.*;
import java.io.*;

import edu.mit.csail.cgs.ewok.verbs.ExpanderIterator;
import edu.mit.csail.cgs.ewok.verbs.Filter;
import edu.mit.csail.cgs.ewok.verbs.FilterIterator;
import edu.mit.csail.cgs.ewok.verbs.Mapper;
import edu.mit.csail.cgs.ewok.verbs.MapperIterator;
import edu.mit.csail.cgs.sigma.CloseableHandler;
import edu.mit.csail.cgs.sigma.ConcatenatingIterator;
import edu.mit.csail.cgs.sigma.FilePrinter;
import edu.mit.csail.cgs.sigma.IteratorCacher;
import edu.mit.csail.cgs.sigma.expression.normalization.QuantileNormalization;
import edu.mit.csail.cgs.sigma.expression.rma.HRMA;
import edu.mit.csail.cgs.sigma.expression.segmentation.*;
import edu.mit.csail.cgs.sigma.expression.segmentation.dpalgos.ConsistentMultiChannelSegmenter;
import edu.mit.csail.cgs.sigma.expression.segmentation.dpalgos.MultiChannelSegmenter;
import edu.mit.csail.cgs.sigma.expression.segmentation.dpalgos.SharingMultiChannelSegmenter;
import edu.mit.csail.cgs.sigma.expression.segmentation.fitters.ExponentialFitter;
import edu.mit.csail.cgs.sigma.expression.segmentation.fitters.FlatFitter;
import edu.mit.csail.cgs.sigma.expression.segmentation.fitters.LineFitter;
import edu.mit.csail.cgs.sigma.expression.segmentation.fitters.LineInterceptFitter;
import edu.mit.csail.cgs.sigma.expression.segmentation.fitters.Priors;
import edu.mit.csail.cgs.sigma.expression.segmentation.fitters.SegmentFitter;
import edu.mit.csail.cgs.sigma.expression.segmentation.fitters.ZeroGaussianFitter;
import edu.mit.csail.cgs.sigma.expression.segmentation.sharing.AllOrNothingSharingFactory;
import edu.mit.csail.cgs.sigma.expression.segmentation.sharing.ParameterSharingFactory;
import edu.mit.csail.cgs.sigma.expression.transcription.*;
import edu.mit.csail.cgs.sigma.expression.transcription.fitters.ClusterFitter;
import edu.mit.csail.cgs.sigma.expression.transcription.fitters.MaxLikeClusterFitter;
import edu.mit.csail.cgs.sigma.expression.transcription.fitters.TAFit;
import edu.mit.csail.cgs.sigma.expression.transcription.identifiers.ExhaustiveIdentifier;
import edu.mit.csail.cgs.sigma.expression.transcription.identifiers.TranscriptIdentifier;
import edu.mit.csail.cgs.sigma.expression.workflow.models.Chunk;
import edu.mit.csail.cgs.sigma.expression.workflow.models.DataSegment;
import edu.mit.csail.cgs.sigma.expression.workflow.models.FileInputData;
import edu.mit.csail.cgs.sigma.expression.workflow.models.InputSegmentation;
import edu.mit.csail.cgs.sigma.expression.workflow.models.ProbeLine;
import edu.mit.csail.cgs.sigma.expression.workflow.models.TranscriptCall;
import edu.mit.csail.cgs.utils.iterators.SingleIterator;
import edu.mit.csail.cgs.utils.json.JSONObject;
import edu.mit.csail.cgs.utils.models.FileTimer;
import edu.mit.csail.cgs.utils.models.Model;
import edu.mit.csail.cgs.utils.models.Timer;

/**
 * Workflow is a wrapper, or a main, class -- it has the ability to run all the other
 * Workflow* segments in their correct order.  It figures out what the next step to 
 * run is, based on the input file's filename.
 * 
 *  In order, the input files are:
 *  (0) *.raw 
 *  (1) *.data
 *  (2) *.chunks
 *  (3) *.packages
 *  (4) *.segments
 *  (4a) *.datasegs
 *  
 *  (5) *.clusters
 *  (6) *.transcripts
 *  
 * Each of these is generated by successive calls to this class, Workflow, on the 
 * previous filetype -- with the exception of the .data files, which are generated
 * by the WorkflowDataGenerator class.
 * 
 * @author tdanford
 *
 */
public class Workflow {

	public static void main(String[] args) {
		WorkflowProperties props = new WorkflowProperties();
		String defaultInputName = "txns288c_plus.datasegs";
		//String defaultInputName = "txns288c_negative.datasegs";
		
		// The idea is that we either (a) take the name we're given on the command 
		// line, or (b) find the "next file" starting from the given default 
		// input file for the workflow.
		File input = args.length > 0 ? 
				new File(args[0]) : 
				new File(props.getDirectory(), defaultInputName);
				
		if(args.length == 0) { 
			args = new String[] { defaultInputName, "0", "7" };
		}
				
		Workflow worker = new Workflow(props, args);
		try {
			worker.processFile(input);
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	private WorkflowProperties props;
	private String[] args;
	
	public Workflow(WorkflowProperties ps, String... as) { 
		props = ps;
		args = as != null ? as.clone() : new String[] {};
	}
	
	public Iterator<DataSegment> completeSegmentation(Iterator<ProbeLine> input, String key, String exptName, SegmentationParameters params) { 
		return processSegmentations(
				processPackages(
						processChunks(
								processData(input),
								key),
						key, params, null),
				params, key, exptName);
	}
	
	public Iterator<DataSegment> completeNoChunksSegmentation(Iterator<ProbeLine> input, String key, String exptName, SegmentationParameters params) { 
		return processSegmentations(
				processPackages(
						processChunks(
								noChunksDataProcessing(input),
								key),
						key, params, null),
				params, key, exptName);
	}
	
	public Iterator<TranscriptCall> completeCalling(Iterator<ProbeLine> input, String key, SegmentationParameters sparams, TranscriptionParameters tparams, String expt) {
		return processClusters(
					processDatasegments(
							completeSegmentation(input, key, expt, sparams), 
							null),
					tparams,
					null, key);
	}
	
	public Iterator<TranscriptCall> completeCalling(Iterator<DataSegment> input, String key, TranscriptionParameters tparams, String expt) {
		return processClusters(
					processDatasegments(
							input,
							null),
					tparams,
					null, key);
	}
	
	public Iterator<Chunk> processData(Iterator<ProbeLine> input) { 
		WorkflowChunker chunker = new WorkflowChunker(props, input);
		return chunker;
	}
	
	public Iterator<Chunk> noChunksDataProcessing(Iterator<ProbeLine> input) { 
		LinkedList<ProbeLine> pls = new LinkedList<ProbeLine>();
		while(input.hasNext()) { pls.add(input.next()); }
		return new SingleIterator<Chunk>(new Chunk(pls));
	}
	
	public Iterator<FileInputData> processChunks(Iterator<Chunk> input, String key) { 
		WorkflowPackager packages = new WorkflowPackager(props.getInputChannels(key), input);
		return packages;
	}
	
	public Iterator<InputSegmentation> processPackages(Iterator<FileInputData> input, String key, SegmentationParameters p, Timer timer) { 
		double probFlat = 1.0-p.probSplit-p.probLine;
		int min = props.getDefaultMinimum();
		
		//ParameterSharingFactory sharingFactory = props.getDefaultParameterSharingFactory(); 
		//int channels = props.getInputChannels(strain).length;

		//SegmentFitter bg = new ExponentialFitter();
		double soft = p.flatVarPenalty, cut = p.flatIntensityPenalty;
		SegmentFitter bg = 
			//new ZeroGaussianFitter(new Priors.LogisticNoisePrior(softness, cutoff));
			new FlatFitter(Math.log(probFlat), Priors.zero, new Priors.LogisticNoisePrior(soft, cut));
		
		SegmentFitter fg = new LineInterceptFitter(Math.log(p.probLine),
				//Priors.gammaPrior(9.0, 0.5),
				new Priors.LogisticSignalPrior(soft, cut), 
				Priors.positive(Priors.zero),
				//Priors.expPrior(p.lineVarPenalty));
				Priors.multiplePrior(-p.lineVarPenalty));

		Segmenter fitter =
			/*
			new SharingMultiChannelSegmenter(p, 
					//sharingFactory.loadSharing(channels),
					sharingFactory.loadSharing(0),
					bg, fg);
			*/
			
			new ConsistentMultiChannelSegmenter(p,
					props.getIndexing(key).allChannels(),
					bg, fg);

		Iterator<InputSegmentation> segmentation = 
				new WorkflowSegmentation(
						min, timer, fitter, input);

		return segmentation;
	}
	
	public Iterator<DataSegment> processSegmentations(Iterator<InputSegmentation> input, SegmentationParameters segParams, String key, String exptName) { 
		WorkflowDataSegmenter dseger = new WorkflowDataSegmenter(props, key, input);
		IteratorCacher<DataSegment> cached = new IteratorCacher<DataSegment>(dseger);
		
		String strain = props.parseStrainFromKey(key);
		String oppStrain = strain.equals("s288c") ? "sigma" : "s288c";
		
		if(segParams.doHRMA) { 
			System.out.println("Performing hRMA analysis...");
			HRMA hrma = new HRMA(cached.iterator());
			hrma.sample(10);

			WorkflowIndexing indexing = props.getIndexing(key);
			Integer[] fg = indexing.findChannels(strain, exptName);
			Integer[] bg = indexing.findChannels(oppStrain, exptName);

			hrma.saveDifferentials(fg, bg);
		}
		
		return cached.iterator();
	}
	
	public Iterator<Cluster> processDatasegments(Iterator<DataSegment> input, Integer[] channels) { 
		WorkflowClusterer clusterer = new WorkflowClusterer(channels, input);
		return clusterer;
	}
	
	public Iterator<TranscriptCall> processClusters(Iterator<Cluster> input, TranscriptionParameters params, Timer time, String key) { 
		//TranscriptIdentifier identifier = new ExhaustiveIdentifier(props, params);
		//WorkflowTranscriptCaller caller = new WorkflowTranscriptCaller(identifier, time, input);
		
		//input = new ExpanderIterator<Cluster,Cluster>(new Cluster.SubClusterExpander(channel), input);

		WorkflowIndexing indexing = props.getIndexing(key);
		//Integer[][] gammas = indexing.blocks();
		Integer[][] gammas = new Integer[][] { { 0 }, { 1 } };
		
		int numChs = 0;
		for(int i = 0; i < gammas.length; i++) { numChs += gammas[i].length; }
		Integer[] channels = new Integer[numChs];
		for(int i = 0, k = 0; i < gammas.length; i++) { 
			for(int j = 0; j < gammas[i].length; j++) { 
				channels[k++] = gammas[i][j];
			}
		}
		Arrays.sort(channels);
		for(int i = 0, k = 0; i < gammas.length; i++) { 
			for(int j = 0; j < gammas[i].length; j++) { 
				gammas[i][j] = findIndex(channels, gammas[i][j]);
			}
		}
		
		System.out.println(String.format("Workflow.processClusters()"));
		System.out.print("\tChannels:");
		for(int i = 0; i < channels.length; i++) { 
			System.out.print(" " + channels[i]);
		}
		System.out.println();
		System.out.println("\tGamma Blocks:");
		for(int i = 0; i < gammas.length; i++) { 
			System.out.print("\t\t" + i + ":");
			for(int j = 0; j < gammas[i].length; j++) { 
				System.out.print(" " + gammas[i][j]);
			}
			System.out.println();
		}
		
		ClusterFitter fitter = new MaxLikeClusterFitter(props, params, channels, gammas);
		WorkflowTranscriptCaller caller = new WorkflowTranscriptCaller(fitter, time, input);
		return caller;
	}
	
	private int findIndex(Integer[] chs, Integer v) { 
		for(int i = 0; i < chs.length; i++) { 
			if(chs[i].equals(v)) { 
				return i; 
			}
		}
		throw new IllegalArgumentException(String.format("%d not found in channel array", v));
	}
	
	public File processFile(File input) throws IOException { 
		String workflowName = props.parseWorkflowName(input);
		String key = props.keyFromWorkflowName(workflowName);
		String type = props.parseWorkflowType(input);
		String strain = props.parseStrainFromKey(key);
		
		System.out.println("type: \"" + type + "\"");
		System.out.println("Input: " + input.getName());
		File dir = input.getParentFile();
		
		Iterator<JSONObject> json = null;
		File output = null;

		IncompletePackageLoader incomplete = null;
		File packageFile = new File(dir, String.format("%s.packages", workflowName));
		File segmentFile = new File(dir, String.format("%s.segments", workflowName));
		boolean append = false; // a flag -- should we append, or replace, the output file?
		
		if(packageFile.exists() && segmentFile.exists()) { 
			// First, we check to see if we've got an incomplete segments file.  
			incomplete = new IncompletePackageLoader(packageFile, segmentFile);
			if(incomplete.size() == 0) { 
				incomplete = null;
			} else { 
				type = "packages";
				append = true;
				System.out.println(String.format("Attempting to finish segmentation (%d pkg)",
						incomplete.size()));
			}
		}
		
		if(type.equals("raw")) { 
			// raw -> data
			WorkflowIndexing index = props.getIndexing(key);
			
			output = new File(dir, String.format("%s.%s", workflowName, "data"));
			File timing = new File(dir, String.format("%s.timing", output.getName()));

			WorkflowDataLoader loader = new WorkflowDataLoader(input);
			QuantileNormalization norm = new QuantileNormalization();
			norm.load(loader);
			norm.normalize(index.blocks()); // this isn't quite right.

			json = null;
			PrintStream ps = new PrintStream(new FileOutputStream(output));
			Iterator<ProbeLine> itr = norm.iterator();
			while(itr.hasNext()) { 
				ProbeLine line = itr.next();
				ps.println(line.toString());
			}
			ps.close();

		} else if(type.equals("data")) { 
			// data -> chunks
			output = new File(dir, String.format("%s.%s", workflowName, "chunks"));
			File timing = new File(dir, String.format("%s.timing", output.getName()));

			WorkflowDataLoader loader = new WorkflowDataLoader(input);

			Iterator<Chunk> chunker = processData(loader);

			json = new MapperIterator<Chunk,JSONObject>(
					new ModelToJSON<Chunk>(), chunker);

		} else if (type.equals("chunks")) { 
			// chunks -> packages
			output = new File(dir, String.format("%s.%s", workflowName, "packages"));
			File timing = new File(dir, String.format("%s.timing", output.getName()));

			WorkflowChunkReader reader = new WorkflowChunkReader(input);
			Iterator<FileInputData> packages = processChunks(reader, key);
			
			json = new MapperIterator<FileInputData,JSONObject>(
					new ModelToJSON<FileInputData>(), packages);
			
		} else if (type.equals("packages")) { 
			// packages -> segments
			output = new File(dir, String.format("%s.%s", workflowName, "segments"));
			File timing = new File(dir, String.format("%s.timing", output.getName()));

			SegmentationParameters p = props.getDefaultSegmentationParameters();

			Iterator<FileInputData> reader =
				incomplete != null ? incomplete : 
				new WorkflowPackageReader(input);

			if(args.length == 2) { 
				String chromFilter = args[1];
				output = new File(dir, String.format("%s_chr%s.%s", workflowName, chromFilter, "segments"));
				timing = new File(dir, String.format("%s_chr%s.timing", output.getName(), chromFilter));
				reader = new FilterIterator<FileInputData,FileInputData>(new FileInputDataChromFilter(chromFilter), reader);
			}
			
			Timer time = new FileTimer(timing, true);
			Iterator<InputSegmentation> segmentation = 
				processPackages(reader, key, p, time);
			
			json = new MapperIterator<InputSegmentation,JSONObject>(
					new ModelToJSON<InputSegmentation>(), segmentation);
			
		} else if (type.equals("segments")) {
			
			// segments -> datasets
			output = new File(dir, String.format("%s.%s", workflowName, "datasegs"));
			File timing = new File(dir, String.format("%s.timing", output.getName()));
			String expt = "matalpha";

			SegmentationParameters p = props.getDefaultSegmentationParameters();

			WorkflowSegmentationReader reader = new WorkflowSegmentationReader(input);
			Iterator<DataSegment> dseger = processSegmentations(reader, p, key, expt);
			
			json = new MapperIterator<DataSegment,JSONObject>(
					new ModelToJSON<DataSegment>(), dseger);
			
		} else if (type.equals("datasegs")) { 

			// segments -> clusters
			String expt = args.length >= 2 ? args[1] : "matalpha";
			
			output = new File(dir, String.format("%s_%s.%s", workflowName, expt, "clusters"));
			File timing = new File(dir, String.format("%s.timing", output.getName()));

			WorkflowDataSegmentReader reader = new WorkflowDataSegmentReader(input);
			
			Iterator<Cluster> clusterer = processDatasegments(reader, null);
			
			json = new MapperIterator<Cluster,JSONObject>(
					new ModelToJSON<Cluster>(), clusterer);
			
		} else if (type.equals("clusters")) { 
			// clusters -> calls
			output = new File(dir, String.format("%s.%s", workflowName, "transcripts"));
			File timing = new File(dir, String.format("%s.timing", output.getName()));
			
			Iterator<Cluster> reader =
				new WorkflowClusterReader(new FileInputStream(input));
			
			TranscriptionParameters params = props.getDefaultTranscriptionParameters();
			
			if(args.length >= 2) { 
				String chromFilter = args[1];
				output = new File(dir, String.format("%s_chr%s.%s", 
						workflowName, chromFilter, "transcripts"));
				timing = new File(dir, String.format("%s_chr%s.timing", 
						output.getName(), chromFilter));

				reader = new FilterIterator<Cluster,Cluster>(
						new ClusterChromFilter(chromFilter), reader);
			}
			
			Timer time = new FileTimer(timing, true);
			Iterator<TranscriptCall> caller = processClusters(reader, params, time, key);

			json = new MapperIterator<TranscriptCall,JSONObject>(
					new ModelToJSON<TranscriptCall>(), caller);
		} else { 
			//throw new IllegalArgumentException(String.format("\"%s\"", type));
			return null;
		}

		if(json != null) { 
			outputJSON(json, output, append);
		}
		System.out.println("Output: " + output.getAbsolutePath());
		
		return output;
	}
	
	public void outputJSON(Iterator<JSONObject> json, File output, boolean append) throws IOException {
		PrintStream ps = new PrintStream(new FileOutputStream(output, append));
		int count = 0;
		while(json.hasNext()) { 
			ps.println(json.next().toString());
			count += 1;
		}
		ps.close();
		System.out.println("#JSON Output: " + count);
	}
	
	public static class ModelToJSON<T extends Model> implements Mapper<T,JSONObject> {
		public JSONObject execute(T a) {
			return a.asJSON();
		} 
	}
	
	public static class FileInputDataChromFilter implements Filter<FileInputData,FileInputData> { 
		private String chrom;
		public FileInputDataChromFilter(String c) { chrom = c; }
		
		public FileInputData execute(FileInputData d) { 
			return d.chrom.equals(chrom) ? d : null;
		}
	}

	public static class ClusterChromFilter implements Filter<Cluster,Cluster> { 
		private String chrom;
		public ClusterChromFilter(String c) { chrom = c; }
		
		public Cluster execute(Cluster d) { 
			return d.chrom().equals(chrom) ? d : null;
		}
	}
}
